8.1 Dependencies

Code
library(dplyr)
library(readr)
library(report) # part of {easystats}
library(see) # part of {easystats}
library(parameters) # part of {easystats}
library(correlation) # part of {easystats}
library(effectsize) # part of {easystats}
library(performance) # part of {easystats}
library(janitor)
library(lme4)
library(knitr)
library(kableExtra)

8.2 Inference tests

8.2.1 Regressions

Code
# fit model
model <- lm(wt ~ 1 + am + mpg, data = mtcars)

# report - text output (nb omits intercept!)
report(model)
We fitted a linear model (estimated using OLS) to predict wt with am and mpg
(formula: wt ~ 1 + am + mpg). The model explains a statistically significant
and substantial proportion of variance (R2 = 0.80, F(2, 29) = 57.66, p < .001,
adj. R2 = 0.79). The model's intercept, corresponding to am = 0 and mpg = 0, is
at 5.74 (95% CI [5.11, 6.36], t(29) = 18.64, p < .001). Within this model:

  - The effect of am is statistically significant and negative (beta = -0.53, 95%
CI [-0.94, -0.11], t(29) = -2.58, p = 0.015; Std. beta = -0.27, 95% CI [-0.48,
-0.06])
  - The effect of mpg is statistically significant and negative (beta = -0.11,
95% CI [-0.15, -0.08], t(29) = -6.79, p < .001; Std. beta = -0.71, 95% CI
[-0.92, -0.49])

Standardized parameters were obtained by fitting the model on a standardized
version of the dataset. 95% Confidence Intervals (CIs) and p-values were
computed using a Wald t-distribution approximation.
Code
# each parameter (including intercept)
report_parameters(model)
  - The intercept is statistically significant and positive (beta = 5.74, 95% CI [5.11, 6.36], t(29) = 18.64, p < .001; Std. beta = 1.10e-16, 95% CI [-0.17, 0.17])
  - The effect of am is statistically significant and negative (beta = -0.53, 95% CI [-0.94, -0.11], t(29) = -2.58, p = 0.015; Std. beta = -0.27, 95% CI [-0.48, -0.06])
  - The effect of mpg is statistically significant and negative (beta = -0.11, 95% CI [-0.15, -0.08], t(29) = -6.79, p < .001; Std. beta = -0.71, 95% CI [-0.92, -0.49])
Code
# just parameters in text format
report_statistics(model)
beta = 5.74, 95% CI [5.11, 6.36], t(29) = 18.64, p < .001; Std. beta = 1.10e-16, 95% CI [-0.17, 0.17]
beta = -0.53, 95% CI [-0.94, -0.11], t(29) = -2.58, p = 0.015; Std. beta = -0.27, 95% CI [-0.48, -0.06]
beta = -0.11, 95% CI [-0.15, -0.08], t(29) = -6.79, p < .001; Std. beta = -0.71, 95% CI [-0.92, -0.49]
Code
# just parameters in table format
parameters(model)
Parameter   | Coefficient |   SE |         95% CI | t(29) |      p
------------------------------------------------------------------
(Intercept) |        5.74 | 0.31 | [ 5.11,  6.36] | 18.64 | < .001
am          |       -0.53 | 0.20 | [-0.94, -0.11] | -2.58 | 0.015 
mpg         |       -0.11 | 0.02 | [-0.15, -0.08] | -6.79 | < .001
Code
# just parameters in table html format 
parameters(model) |>
  mutate(p = insight::format_p(p)) |>
  mutate_if(is.numeric, round_half_up, digits = 2) |>
  kable() |>
  kable_classic(full_width = FALSE)
Parameter Coefficient SE CI CI_low CI_high t df_error p
(Intercept) 5.74 0.31 0.95 5.11 6.36 18.64 29 p < .001
am -0.53 0.20 0.95 -0.94 -0.11 -2.58 29 p = 0.015
mpg -0.11 0.02 0.95 -0.15 -0.08 -6.79 29 p < .001
Code
# what if i just want some of these cols?
parameters(model) |>
  as.data.frame() |>
  mutate(p = insight::format_p(p)) |>
  select(r = Coefficient, ci_lower = CI_low, ci_upper = CI_high, p) |>
  mutate_if(is.numeric, round_half_up, digits = 2)
      r ci_lower ci_upper         p
1  5.74     5.11     6.36  p < .001
2 -0.53    -0.94    -0.11 p = 0.015
3 -0.11    -0.15    -0.08  p < .001
Code
# table in markdown format
report_table(model)
Parameter   | Coefficient |         95% CI | t(29) |      p | Std. Coef.
------------------------------------------------------------------------
(Intercept) |        5.74 | [ 5.11,  6.36] | 18.64 | < .001 |   1.10e-16
am          |       -0.53 | [-0.94, -0.11] | -2.58 | 0.015  |      -0.27
mpg         |       -0.11 | [-0.15, -0.08] | -6.79 | < .001 |      -0.71
            |             |                |       |        |           
AIC         |             |                |       |        |           
AICc        |             |                |       |        |           
BIC         |             |                |       |        |           
R2          |             |                |       |        |           
R2 (adj.)   |             |                |       |        |           
Sigma       |             |                |       |        |           

Parameter   | Std. Coef. 95% CI |   Fit
---------------------------------------
(Intercept) |    [-0.17,  0.17] |      
am          |    [-0.48, -0.06] |      
mpg         |    [-0.92, -0.49] |      
            |                   |      
AIC         |                   | 45.05
AICc        |                   | 46.53
BIC         |                   | 50.91
R2          |                   |  0.80
R2 (adj.)   |                   |  0.79
Sigma       |                   |  0.45
Code
# table in html format - needs to be rounded manually
report_table(model) |>
  mutate(p = insight::format_p(p)) |>
  mutate_if(is.numeric, round_half_up, digits = 2) |>
  kable() |>
  kable_classic(full_width = FALSE)
Parameter Coefficient CI CI_low CI_high t df_error p Std_Coefficient Std_Coefficient_CI_low Std_Coefficient_CI_high Fit
1 (Intercept) 5.74 0.95 5.11 6.36 18.64 29 p < .001 0.00 -0.17 0.17 NA
2 am -0.53 0.95 -0.94 -0.11 -2.58 29 p = 0.015 -0.27 -0.48 -0.06 NA
3 mpg -0.11 0.95 -0.15 -0.08 -6.79 29 p < .001 -0.71 -0.92 -0.49 NA
4 NA NA NA NA NA NA NA NA NA NA NA
5 AIC NA NA NA NA NA NA NA NA NA 45.05
6 AICc NA NA NA NA NA NA NA NA NA 46.53
7 BIC NA NA NA NA NA NA NA NA NA 50.91
8 R2 NA NA NA NA NA NA NA NA NA 0.80
9 R2 (adj.) NA NA NA NA NA NA NA NA NA 0.79
11 Sigma NA NA NA NA NA NA NA NA NA 0.45
Code
# plot
parameters(model) |>
  plot() 

8.2.2 Correlations

8.2.2.1 Single correlation tests

Code
# fit model
model <- cor.test(mtcars$mpg, mtcars$wt)

# report - text output 
report(model)
Effect sizes were labelled following Funder's (2019) recommendations.

The Pearson's product-moment correlation between mtcars$mpg and mtcars$wt is
negative, statistically significant, and very large (r = -0.87, 95% CI [-0.93,
-0.74], t(30) = -9.56, p < .001)
Code
# table in html format - needs to be rounded manually
report_table(model) |>
  mutate(p = insight::format_p(p)) |>
  mutate_if(is.numeric, round_half_up, digits = 2) |>
  kable() |>
  kable_classic(full_width = FALSE)
Parameter1 Parameter2 r CI CI_low CI_high t df_error p Method Alternative
mtcars$mpg mtcars$wt -0.87 0.95 -0.93 -0.74 -9.56 30 p < .001 Pearson's product-moment correlation two.sided

8.2.2.2 Many

Code
results <- correlation(iris)

results
# Correlation Matrix (pearson-method)

Parameter1   |   Parameter2 |     r |         95% CI | t(148) |         p
-------------------------------------------------------------------------
Sepal.Length |  Sepal.Width | -0.12 | [-0.27,  0.04] |  -1.44 | 0.152    
Sepal.Length | Petal.Length |  0.87 | [ 0.83,  0.91] |  21.65 | < .001***
Sepal.Length |  Petal.Width |  0.82 | [ 0.76,  0.86] |  17.30 | < .001***
Sepal.Width  | Petal.Length | -0.43 | [-0.55, -0.29] |  -5.77 | < .001***
Sepal.Width  |  Petal.Width | -0.37 | [-0.50, -0.22] |  -4.79 | < .001***
Petal.Length |  Petal.Width |  0.96 | [ 0.95,  0.97] |  43.39 | < .001***

p-value adjustment method: Holm (1979)
Observations: 150
Code
results %>%
  summary(redundant = TRUE) %>%
  plot()

8.2.2.3 By group

Code
iris %>%
  select(Species, Sepal.Length, Sepal.Width, Petal.Width) %>%
  group_by(Species) %>%
  correlation()
# Correlation Matrix (pearson-method)

Group      |   Parameter1 |  Parameter2 |    r |        95% CI | t(48) |         p
----------------------------------------------------------------------------------
setosa     | Sepal.Length | Sepal.Width | 0.74 | [ 0.59, 0.85] |  7.68 | < .001***
setosa     | Sepal.Length | Petal.Width | 0.28 | [ 0.00, 0.52] |  2.01 | 0.101    
setosa     |  Sepal.Width | Petal.Width | 0.23 | [-0.05, 0.48] |  1.66 | 0.104    
versicolor | Sepal.Length | Sepal.Width | 0.53 | [ 0.29, 0.70] |  4.28 | < .001***
versicolor | Sepal.Length | Petal.Width | 0.55 | [ 0.32, 0.72] |  4.52 | < .001***
versicolor |  Sepal.Width | Petal.Width | 0.66 | [ 0.47, 0.80] |  6.15 | < .001***
virginica  | Sepal.Length | Sepal.Width | 0.46 | [ 0.20, 0.65] |  3.56 | 0.002**  
virginica  | Sepal.Length | Petal.Width | 0.28 | [ 0.00, 0.52] |  2.03 | 0.048*   
virginica  |  Sepal.Width | Petal.Width | 0.54 | [ 0.31, 0.71] |  4.42 | < .001***

p-value adjustment method: Holm (1979)
Observations: 50

8.2.3 t-tests

NB Cohen’s d is approximated - better to calculate it separately and accurately.

Code
# fit model
model <- t.test(mpg ~ am, data = mtcars)

# report - text output 
report(model)
Effect sizes were labelled following Cohen's (1988) recommendations.

The Welch Two Sample t-test testing the difference of mpg by am (mean in group
0 = 17.15, mean in group 1 = 24.39) suggests that the effect is negative,
statistically significant, and large (difference = -7.24, 95% CI [-11.28,
-3.21], t(18.33) = -3.77, p = 0.001; Cohen's d = -1.76, 95% CI [-2.82, -0.67])
Code
# table in html format - needs to be rounded manually
report_table(model) |>
  mutate(p = insight::format_p(p)) |>
  mutate_if(is.numeric, round_half_up, digits = 2) |>
  kable() |>
  kable_classic(full_width = FALSE)
Parameter Group Mean_Group1 Mean_Group2 Difference CI CI_low CI_high t df_error p Method Alternative d d_CI_low d_CI_high
mpg am 17.15 24.39 -7.24 0.95 -11.28 -3.21 -3.77 18.33 p = 0.001 Welch Two Sample t-test two.sided -1.76 -2.82 -0.67
Code
# estimate Cohen's d directly from data
cohens_d(mpg ~ am, data = mtcars)
Cohen's d |         95% CI
--------------------------
-1.48     | [-2.27, -0.67]

- Estimated using pooled SD.

8.2.4 Multilevel/hierarchical/mixed models

Code
# fit model
model <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy)

# parameters in text format 
report(model)
We fitted a linear mixed model (estimated using REML and nloptwrap optimizer)
to predict Reaction with Days (formula: Reaction ~ Days). The model included
Days as random effects (formula: ~Days | Subject). The model's total
explanatory power is substantial (conditional R2 = 0.80) and the part related
to the fixed effects alone (marginal R2) is of 0.28. The model's intercept,
corresponding to Days = 0, is at 251.41 (95% CI [237.94, 264.87], t(174) =
36.84, p < .001). Within this model:

  - The effect of Days is statistically significant and positive (beta = 10.47,
95% CI [7.42, 13.52], t(174) = 6.77, p < .001; Std. beta = 0.54, 95% CI [0.38,
0.69])

Standardized parameters were obtained by fitting the model on a standardized
version of the dataset. 95% Confidence Intervals (CIs) and p-values were
computed using a Wald t-distribution approximation.
Code
# parameters in table format
parameters(model) |>
  mutate(p = insight::format_p(p)) |>
  mutate_if(is.numeric, round_half_up, digits = 2) |>
  kable() |>
  kable_classic(full_width = FALSE)
Parameter Coefficient SE CI CI_low CI_high t df_error p Effects Group
(Intercept) 251.41 6.82 0.95 237.94 264.87 36.84 174 p < .001 fixed
Days 10.47 1.55 0.95 7.42 13.52 6.77 174 p < .001 fixed
SD (Intercept) 24.74 NA 0.95 NA NA NA NA random Subject
SD (Days) 5.92 NA 0.95 NA NA NA NA random Subject
Cor (Intercept~Days) 0.07 NA 0.95 NA NA NA NA random Subject
SD (Observations) 25.59 NA 0.95 NA NA NA NA random Residual
Code
# table in html format - needs to be rounded manually
report_table(model) |>
  mutate(p = insight::format_p(p)) |>
  mutate_if(is.numeric, round_half_up, digits = 2) |>
  kable() |>
  kable_classic(full_width = FALSE)
Parameter Coefficient CI CI_low CI_high t df_error p Effects Group Std_Coefficient Std_Coefficient_CI_low Std_Coefficient_CI_high Fit
1 (Intercept) 251.41 0.95 237.94 264.87 36.84 174 p < .001 fixed 0.00 -0.32 0.32 NA
2 Days 10.47 0.95 7.42 13.52 6.77 174 p < .001 fixed 0.54 0.38 0.69 NA
3 NA 24.74 0.95 NA NA NA NA random Subject NA NA NA NA
4 NA 5.92 0.95 NA NA NA NA random Subject NA NA NA NA
5 NA 0.07 0.95 NA NA NA NA random Subject NA NA NA NA
6 NA 25.59 0.95 NA NA NA NA random Residual NA NA NA NA
7 NA NA NA NA NA NA NA NA NA NA NA NA NA
8 AIC NA NA NA NA NA NA NA NA NA NA NA 1755.63
9 AICc NA NA NA NA NA NA NA NA NA NA NA 1756.11
10 BIC NA NA NA NA NA NA NA NA NA NA NA 1774.79
11 R2 (conditional) NA NA NA NA NA NA NA NA NA NA NA 0.80
12 R2 (marginal) NA NA NA NA NA NA NA NA NA NA NA 0.28
15 Sigma NA NA NA NA NA NA NA NA NA NA NA 25.59
Code
# plot
parameters(model) |>
  plot() 

Code
# check assumptions of random effects
result <- check_normality(model, effects = "random")
plot(result)
[[1]]

8.2.5 ANOVAs

Code
# fit model
model <- aov(mpg ~ factor(gear) + factor(carb), data = mtcars)

# commonly used effect size: partial eta squared
eta_squared(model)
# Effect Size for ANOVA (Type I)

Parameter    | Eta2 (partial) |       95% CI
--------------------------------------------
factor(gear) |           0.69 | [0.49, 1.00]
factor(carb) |           0.66 | [0.41, 1.00]

- One-sided CIs: upper bound fixed at [1.00].
Code
# better effect size: partialomega squared
omega_squared(model)
# Effect Size for ANOVA (Type I)

Parameter    | Omega2 (partial) |       95% CI
----------------------------------------------
factor(gear) |             0.62 | [0.38, 1.00]
factor(carb) |             0.57 | [0.26, 1.00]

- One-sided CIs: upper bound fixed at [1.00].

8.3 Summary statistics

Code
# all columns
iris |>
  group_by(Species) |>
  report_table() 
Group      |     Variable | n_Obs | Mean |   SD | Median |  MAD |  Min |  Max
-----------------------------------------------------------------------------
setosa     | Sepal.Length |    50 | 5.01 | 0.35 |   5.00 | 0.30 | 4.30 | 5.80
setosa     |  Sepal.Width |    50 | 3.43 | 0.38 |   3.40 | 0.37 | 2.30 | 4.40
setosa     | Petal.Length |    50 | 1.46 | 0.17 |   1.50 | 0.15 | 1.00 | 1.90
setosa     |  Petal.Width |    50 | 0.25 | 0.11 |   0.20 | 0.00 | 0.10 | 0.60
versicolor | Sepal.Length |    50 | 5.94 | 0.52 |   5.90 | 0.52 | 4.90 | 7.00
versicolor |  Sepal.Width |    50 | 2.77 | 0.31 |   2.80 | 0.30 | 2.00 | 3.40
versicolor | Petal.Length |    50 | 4.26 | 0.47 |   4.35 | 0.52 | 3.00 | 5.10
versicolor |  Petal.Width |    50 | 1.33 | 0.20 |   1.30 | 0.22 | 1.00 | 1.80
virginica  | Sepal.Length |    50 | 6.59 | 0.64 |   6.50 | 0.59 | 4.90 | 7.90
virginica  |  Sepal.Width |    50 | 2.97 | 0.32 |   3.00 | 0.30 | 2.20 | 3.80
virginica  | Petal.Length |    50 | 5.55 | 0.55 |   5.55 | 0.67 | 4.50 | 6.90
virginica  |  Petal.Width |    50 | 2.03 | 0.27 |   2.00 | 0.30 | 1.40 | 2.50

Group      | Skewness | Kurtosis | n_Missing
--------------------------------------------
setosa     |     0.12 |    -0.25 |         0
setosa     |     0.04 |     0.95 |         0
setosa     |     0.11 |     1.02 |         0
setosa     |     1.25 |     1.72 |         0
versicolor |     0.11 |    -0.53 |         0
versicolor |    -0.36 |    -0.37 |         0
versicolor |    -0.61 |     0.05 |         0
versicolor |    -0.03 |    -0.41 |         0
virginica  |     0.12 |     0.03 |         0
virginica  |     0.37 |     0.71 |         0
virginica  |     0.55 |    -0.15 |         0
virginica  |    -0.13 |    -0.60 |         0
Code
# all columns - html output with rounding
iris |>
  group_by(Species) |>
  report_table() |>
  mutate_if(is.numeric, round_half_up, digits = 2) |>
  kable() |>
  kable_classic(full_width = FALSE)
Group Variable n_Obs Mean SD Median MAD Min Max Skewness Kurtosis n_Missing
setosa Sepal.Length 50 5.01 0.35 5.00 0.30 4.3 5.8 0.12 -0.25 0
setosa Sepal.Width 50 3.43 0.38 3.40 0.37 2.3 4.4 0.04 0.95 0
setosa Petal.Length 50 1.46 0.17 1.50 0.15 1.0 1.9 0.11 1.02 0
setosa Petal.Width 50 0.25 0.11 0.20 0.00 0.1 0.6 1.25 1.72 0
versicolor Sepal.Length 50 5.94 0.52 5.90 0.52 4.9 7.0 0.11 -0.53 0
versicolor Sepal.Width 50 2.77 0.31 2.80 0.30 2.0 3.4 -0.36 -0.37 0
versicolor Petal.Length 50 4.26 0.47 4.35 0.52 3.0 5.1 -0.61 0.05 0
versicolor Petal.Width 50 1.33 0.20 1.30 0.22 1.0 1.8 -0.03 -0.41 0
virginica Sepal.Length 50 6.59 0.64 6.50 0.59 4.9 7.9 0.12 0.03 0
virginica Sepal.Width 50 2.97 0.32 3.00 0.30 2.2 3.8 0.37 0.71 0
virginica Petal.Length 50 5.55 0.55 5.55 0.67 4.5 6.9 0.55 -0.15 0
virginica Petal.Width 50 2.03 0.27 2.00 0.30 1.4 2.5 -0.13 -0.60 0
Code
# subset of columns
iris |>
  group_by(Species) |>
  report_table() |>
  select(Group, Variable, n_Obs, Mean, SD) |>
  mutate_if(is.numeric, round_half_up, digits = 2) |>
  kable() |>
  kable_classic(full_width = FALSE)
Group Variable n_Obs Mean SD
setosa Sepal.Length 50 5.01 0.35
setosa Sepal.Width 50 3.43 0.38
setosa Petal.Length 50 1.46 0.17
setosa Petal.Width 50 0.25 0.11
versicolor Sepal.Length 50 5.94 0.52
versicolor Sepal.Width 50 2.77 0.31
versicolor Petal.Length 50 4.26 0.47
versicolor Petal.Width 50 1.33 0.20
virginica Sepal.Length 50 6.59 0.64
virginica Sepal.Width 50 2.97 0.32
virginica Petal.Length 50 5.55 0.55
virginica Petal.Width 50 2.03 0.27

8.4 Assumption checks

Beware that checking assumptions can lead to as many bad practices as it does good ones! (e.g., poorly justified post hoc outlier exclusion)

8.4.1 Multiple checks at once

Code
# fit model
model <- lm(wt ~ 1 + am + mpg, data = mtcars)

# check multiple model assumptions
check_model(model)

8.4.2 Normality of distribution of residuals

Code
res_normality <- check_normality(model)

res_normality
Warning: Non-normality of residuals detected (p = 0.013).
Code
plot(res_normality, type = "qq")

Code
plot(res_normality, type = "density")

8.4.3 Multicolinearity

Code
res_collinearity <- check_collinearity(model)

res_collinearity
# Check for Multicollinearity

Low Correlation

 Term  VIF   VIF 95% CI Increased SE Tolerance Tolerance 95% CI
   am 1.56 [1.19, 2.68]         1.25      0.64     [0.37, 0.84]
  mpg 1.56 [1.19, 2.68]         1.25      0.64     [0.37, 0.84]
Code
plot(res_collinearity)

8.4.4 Outliers

Code
res_outliers <- check_outliers(model, method = "cook") # "all" requires other dependencies and can take some time to run  
#res_outliers <- check_outliers(model, method = "all") # "all" requires other dependencies and can take some time to run  

res_outliers
OK: No outliers detected.
- Based on the following method and threshold: cook (0.808).
- For variable: (Whole model)
Code
plot(res_outliers)

8.4.5 Heteroscedasticity

Code
res_het <- check_heteroscedasticity(model)

res_het
OK: Error variance appears to be homoscedastic (p = 0.053).
Code
plot(res_het)